library(rnoaa)

weather_df = 
  rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
                      var = c("PRCP", "TMIN", "TMAX"), 
                      date_min = "2017-01-01",
                      date_max = "2017-12-31") %>%
  mutate(
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
weather_df %>% View

Start a plot

Blank plot

ggplot(weather_df, aes(x = tmin, y = tmax))

Scatterplot

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

weather_df %>%
  filter(name == "CentralPark_NY") %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point()

weather_sp = 
  ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point()
plot_weather = 
  weather_df %>%
  ggplot(aes(x = tmin, y = tmax)) 

plot_weather + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

More plot options

Add an aesthetic

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name))
## Warning: Removed 15 rows containing missing values (geom_point).

Add a geom add a smooth line (global function)

se = FALSE—get rid of confidence interval

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name)) + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Add some options …

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .4) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

alpha doesn’t work for geom_smooth

color = name is a global map

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .4) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .4) +
  geom_smooth(aes(color = name), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

same result(less straightforward)

ggplot(weather_df, aes(x = tmin, y = tmax, color = prcp)) + 
  geom_point(alpha = .4)
## Warning: Removed 15 rows containing missing values (geom_point).

Facetting …

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .4) +
  geom_smooth(se = FALSE) + 
  facet_grid( ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

A more interesting plot

Precipitation

ggplot(weather_df, aes(x = date, y = tmax, color = name, size = prcp)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) + 
  facet_grid(~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_line()

Learning Assessment

weather_df %>% 
  filter(name == "CentralPark_NY") %>% 
  mutate(tmax_f = (tmax * 9/5) + 32, tmin_f = (tmin * 9/5) + 32) %>% 
  ggplot(aes(x = tmax_f, y = tmin_f)) +
  geom_point(alpha = .4) +
  geom_smooth(method = lm, se = FALSE)

?geom_smooth

Tips and tricks

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_smooth(se = FALSE) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

To avoid overplotting—lots of—1000000 data

ggplot(weather_df, aes(x = tmax, y = tmin)) + 
  geom_hex()
## Warning: Removed 15 rows containing non-finite values (stat_binhex).

ggplot(weather_df, aes(x = tmax, y = tmin)) + 
  geom_bin2d()
## Warning: Removed 15 rows containing non-finite values (stat_bin2d).

ggplot(weather_df, aes(x = tmax, y = tmin)) + 
  geom_density2d()
## Warning: Removed 15 rows containing non-finite values (stat_density2d).

Why the two lines below don’t produce the same result:

#color setted to blue outside the aes mapping
ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin), color = "blue")
## Warning: Removed 15 rows containing missing values (geom_point).

#color setted to blue inside the aes mapping, we’re implicitly creating a color variable that has the value blue everywhere; ggplot is then assigning colors according to this variable using the default color scheme.
ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).

Univariate plots

Histograms

ggplot(weather_df, aes(x = tmax)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df, aes(x = tmax)) + 
  geom_histogram()+
  facet_grid(~name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

position = “dodge”—-places the bars for each group side-by-side

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_histogram(position = "dodge", binwidth = 2)
## Warning: Removed 3 rows containing non-finite values (stat_bin).

Density plot

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_density(alpha = .5)
## Warning: Removed 3 rows containing non-finite values (stat_density).

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).

Boxplots

ggplot(weather_df, aes(x = name, y = tmax)) + 
  geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

Violin plot

ggplot(weather_df, aes(x = name, y = tmax)) + 
  geom_violin()
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

ggplot(weather_df, aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  stat_summary(fun.y = median, geom = "point", color = "blue", size = 3)
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_summary).

Ridge plots

ggplot(weather_df, aes(x = tmax, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

Learning Assessment: compare precipitation across locations

highly skewed!

Make a bunch of plot

ggplot(weather_df, aes(y = prcp, x = name)) + 
  geom_boxplot() 
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

violin-worse

ggplot(weather_df, aes(y = prcp, x = name)) + 
  geom_violin() 
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

ggplot(weather_df, aes(x = prcp)) + 
  geom_density(aes(fill = name), alpha = .5) 
## Warning: Removed 3 rows containing non-finite values (stat_density).

ggplot(weather_df, aes(x = prcp)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df, aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 4.61
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

compliment this with a plot showing data for all precipitation less than 100, or for a data omitting days with no precipitation

weather_df %>% 
  filter(prcp > 0) %>% 
  ggplot(aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 19.7

Saving plots

weather_plot = ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) 

ggsave("weather_plot.pdf", weather_plot, width = 8, height = 5)
## Warning: Removed 15 rows containing missing values (geom_point).